package Parser

import (
	"github.com/PuerkitoBio/goquery"
	"regexp"
	"strconv"
	"xiaoshuo/engine"
)


var idRe = regexp.MustCompile(`https://www.17k.com/book/([0-9]+).html`)
/**
 * 提取最终的小说信息
 */
 func ContentParse(document *goquery.Document) engine.ParserResult {
 	content := engine.Profile{}
 	// 提取小说封面和小说阅读地址
 	url, _ := document.Find(".alltextleft a").Eq(0).Attr("href")
 	content.Url = "https:" + url
	match := idRe.FindSubmatch([]byte(content.Url))
	if len(match) >= 2 {
		content.Id = string(match[1])
	}else {
		content.Id = string("0")
	}
 	cover, _ := document.Find(".alltextleft a img").Eq(0).Attr("src")
 	content.Cover = cover
 	// 提取小说标题
 	title := document.Find(".alltextmiddle dl dt a").Eq(0).Text()
 	content.Title = title

	 baseDoc := document.Find(".alltextmiddle dl dd ul li")
	 // 提取小说作者
 	lineOne := baseDoc.Eq(0).Find("span")
 	author := lineOne.Eq(0).Find("a").Text()
 	content.Author = author
 	// 提取小说类型
 	kind := lineOne.Eq(1).Find("a").Text()
 	content.Kind = kind
 	// 提取小说阅读量
 	number := lineOne.Eq(2).Find("code").Text()
 	count, err := strconv.Atoi(string(number))
 	if err != nil {
 		count = 0
	}
 	content.WordNum = count
 	// 提取小说简介
 	desc := baseDoc.Eq(2).Find("p a").Text()
 	content.Desc = desc
 	// 提取小说最近更新时间
 	update := baseDoc.Eq(3).Find("cite").Text()
 	content.Updated = update

 	content.Type = "xiaoshuo"
	result := engine.ParserResult{
		Item: content,
	}
 	return result
 }
